#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
#       Replication file (models):                                         #
#       D�r & M�dlhamer (2021)                                             #
#       Power and Innovative Capacity: Explaining Variation in             #
#       Intellectual Property Rights Regulation across Trade Agreements    #
#       International Interactions                                         #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#


rm(list=ls())
setwd("ADD PERSONAL DIRECTORY") 
sink("replication_output.txt", append=FALSE, split=TRUE, type = c("output", "message"))


# load packages -----------------------------------------------------------


library(pacman)
p_load("tidyverse", "stringi", "countrycode", "prediction", "sampleSelection", 
              "stargazer", "clipr", "reshape2", "gtools",
              "quanteda", "quanteda.textmodels", "quanteda.textstats", "tidytext")


# load data -----------------------------------------------------------


data <- read_csv("replication_data_duer_moedlhamer.csv", col_types = cols(number = col_character()))


# add text analysis data -----------------------------------------------------------
# THIS IS ALREADY INCLUDED IN THE REPLICATION DATA FILE, BUT CAN BE RE-RUN FOR REPLICATION PURPOSES

#source("duer_moedlhamer_replication_LSA.R")
#data <- subset(data, select=-c(lsa1, lsa_pat1, lsa_tra1, lsa_cop1))
#data <- merge(data, d_lsa[,c("number", "lsa1", "lsa_pat1", "lsa_cop1", "lsa_tra1")], all.x=TRUE, by="number")
#write.csv(data, "replication_data_duer_moedlhamer.csv", na="", row.names=FALSE)


# Descriptive data --------------------------------------------------------


#north-south agreements
table(data$northsouth)

#type of agreements
table(data$typememb)

#continents
table(data$regioncon)

#N of countries
dplyr::n_distinct(as.vector(as.matrix(data[,paste0("c",1:91)])), na.rm = TRUE)

#Table A1 (in the Appendix)
data[,c("name","year","ipr_included")] %>% dplyr::arrange(data$year, data$name)

#Table A2 (in the Appendix)
m1 <- melt(data[,c(paste0("c", 1:91))])
m1$country <- countrycode(m1$value, "iso3n", "country.name", custom_match=c("900"="Kosovo"))
table(m1$country)
rm(m1)

#share of agreements with IPR provisions
table(data$ipr_included)
prop.table(table(data$ipr_included))

#N of words
sum(data$iprlength)

#correlations among dependent variables
cor(data[,c("lsa1", "lsa_pat1", "lsa_tra1", "lsa_cop1")], use="pairwise.complete.obs")
cor.test(data$lsa1, data$ipr_depth)
cor.test(data$lsa1, data$ms_ipr_tripsplus_sum)

#export descriptive statistics (Table 1 in paper)
data$gdp_fill_diff_max_ln <- log(data$gdp_fill_diff_max)
data$gdppc_fill_diff_max_ln <- log(data$gdppc_fill_diff_max)
data$gdp_memb_sum_ln <- log(data$gdp_memb_sum)
data$ptalength_ln <- log(data$ptalength)
data$patents_fill_diff_max_ln <- log(data$patents_fill_diff_max)
data$patents_fill_mean_ln <- log(data$patents_fill_mean)
data$patents_share_wipo_diff_max1_ln <- log(data$patents_share_wipo_diff_max1)

stargazer(as.data.frame(data[,c("ipr_included", "lsa1", "lsa_pat1", "lsa_tra1", "lsa_cop1", "depth_index","ms_ipr_tripsplus_sum", 
                  "patents_fill_diff_max_ln", 
                  "patents_share_wipo_diff_max1_ln", "gp_index_diff_max", "rd_spend_fill_diff_max", 
                  "gdp_fill_diff_max_ln", 
                  "emerging", "gdppc_fill_diff_max_ln", "polity_fill_mean","wto_memb", "ptalength_ln", 
                  "gdp_memb_sum_ln", "roll_over_agreements", "patents_fill_mean_ln", "top_economies", "tbt_prov")]),
          covariate.labels = c("IPR included", "IPR depth", "IPR depth (Pat.)", "IPR depth (Marks)", "IPR depth (Copy.)", 
                               "DESTA IPR","TRIPS+", "Patents (diff., log.)", 
                               "Patents share (diff., log.)", "IP protection (diff.)", "R\\&D spending (diff.)", 
                               "GDP (diff., log.)", "BRIC countries", "GDPpc (diff., log.)", 
                               "Democracy (mean)", "WTO member", "Word count (PTA, log.)", 
                               "GDP (sum, log.)", "Roll-over agreements", "Patents (mean, log.)", 
                               "Big3 economies", "TBT provision"),
          digits=2, median=TRUE, type = "latex", label="tab_desc", font.size = "footnotesize", 
          title = "Descriptive statistics", omit.summary.stat=c("p25", "p75"), out="table1_descriptivestatistics.tex") 
write_last_clip()

#UK roll-over agreements
table(data$roll_over_agreements)


# empirical analysis ------------------------------------------------------


## no covariates model (Table A 5 in the Appendix) ------------------------------------------------------


summary(mh0 <- heckit(ipr_included ~ gdp_fill_diff_max_ln * patents_fill_diff_max_ln + tbt_prov + NULL,
                      lsa1 ~ gdp_fill_diff_max_ln * patents_fill_diff_max_ln  + NULL, 
                      data=data, method="2step"))


## baseline model (Table 2 in paper) ------------------------------------------------------


mh1 <- heckit(ipr_included ~ 
                log(gdp_fill_diff_max) * log(patents_fill_diff_max) +
                emerging + 
                log(gdppc_fill_diff_max) + 
                polity_fill_mean + 
                wto_memb + 
                log(ptalength) + 
                year_count +
                tbt_prov +
                NULL,
              lsa1 ~
                log(gdp_fill_diff_max) * log(patents_fill_diff_max) +
                emerging + 
                log(gdppc_fill_diff_max) + 
                polity_fill_mean + 
                wto_memb + 
                log(ptalength) + 
                year_count +
                NULL, 
              data=data, method="2step")
summary(mh1)


## different dependent variables ------------------------------------------------------


#with lsa_pat1
summary(mh2 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov  + NULL, 
                      outcome = lsa_pat1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))

#with lsa_tra1
summary(mh3 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                      outcome = lsa_tra1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))

#with lsa_cop1
summary(mh4 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                      outcome = lsa_cop1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))

#with DESTA IPR measure
summary(mh5 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                      outcome = ipr_depth ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))

#with Morin/Surbeck TRIPS+ measure
summary(mh6 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                      outcome = ms_ipr_tripsplus_sum ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))


## varying operationalization of predictors ------------------------------------------------------


#with patents_share_wipo_diff_max1
summary(mh7 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_share_wipo_diff_max1+1) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                      outcome = lsa1 ~ log(gdp_fill_diff_max) * log(patents_share_wipo_diff_max1+1) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))

#with gp_index_diff_max
summary(mh8 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * gp_index_diff_max + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                      outcome = lsa1 ~ log(gdp_fill_diff_max) * gp_index_diff_max + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))

#with rd_spend_fill_diff_max
summary(mh9 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * rd_spend_fill_diff_max + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                      outcome = lsa1 ~ log(gdp_fill_diff_max) * rd_spend_fill_diff_max + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                      data = data, method = "2step"))

#with gdp_fill_diff_mean and patents_fill_diff_mean
summary(mh10 <- heckit(ipr_included ~ log(gdp_fill_diff_mean) * log(patents_fill_diff_mean) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                       outcome = lsa1 ~ log(gdp_fill_diff_mean) * log(patents_fill_diff_mean) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                       data = data, method = "2step"))


## other check ------------------------------------------------------


#dropping agreements with small large inverted
data_0 <- data[data$gdp_patents_fill_diff>0,]
summary(mh11 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                       outcome = lsa1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                       data = data_0, method = "2step"))


## additional controls ------------------------------------------------------


#with log(gdp_memb_sum), typememb and UK roll over agreements as further controls
summary(mh12 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + log(gdp_memb_sum) + as.factor(typememb) + roll_over_agreements + tbt_prov + NULL, 
                      outcome = lsa1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + log(gdp_memb_sum)+ as.factor(typememb) + roll_over_agreements + NULL, 
                      data = data, method = "2step"))

#with patents_fill_mean as further control
summary(mh13 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + log(patents_fill_mean) + tbt_prov + NULL, 
                       outcome = lsa1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + log(patents_fill_mean) + NULL, 
                       data = data, method = "2step"))

#with top economies as further control
summary(mh14 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + top_economies + tbt_prov + NULL, 
                      outcome = lsa1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + top_economies + NULL, 
                      data = data, method = "2step"))

#both normal interaction plus north-south times gdp_diff interaction
summary(mh15 <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + log(gdp_fill_diff_max)*as.factor(northsouth) + tbt_prov + NULL, 
                      outcome = lsa1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + log(gdp_fill_diff_max)*as.factor(northsouth) + NULL, 
                      data = data, method = "2step"))


## English language PTAs only model ------------------------------------------------------


datana <- data[!is.na(data$patents_fill_diff_max),]
datana <- datana[!is.na(datana$polity_fill_mean),]
datana_engl <- subset(datana, language == "English")
mh16 <- heckit(ipr_included ~ 
                   log(gdp_fill_diff_max) * log(patents_fill_diff_max) +
                   emerging + 
                   log(gdppc_fill_diff_max) + 
                   polity_fill_mean + 
                   wto_memb + 
                   log(ptalength) + 
                   year_count +
                   tbt_prov +
                   NULL,
               lsa1 ~
                   log(gdp_fill_diff_max) * log(patents_fill_diff_max) +
                   emerging + 
                   log(gdppc_fill_diff_max) + 
                   polity_fill_mean + 
                   wto_memb + 
                   log(ptalength) + 
                   year_count +
                   NULL, 
               data=datana_engl, method="2step")
summary(mh16)

#with lsa_pat1
summary(mh16a <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov  + NULL, 
                        outcome = lsa_pat1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                        data = datana_engl, method = "2step"))
#with lsa_tra1
summary(mh16b <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                        outcome = lsa_tra1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                        data = datana_engl, method = "2step"))
#with lsa_cop1
summary(mh16c <- heckit(ipr_included ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + tbt_prov + NULL, 
                        outcome = lsa_cop1 ~ log(gdp_fill_diff_max) * log(patents_fill_diff_max) + emerging + log(gdppc_fill_diff_max) + polity_fill_mean + wto_memb + log(ptalength) + year_count + NULL, 
                        data = datana_engl, method = "2step"))


# Export models -----------------------------------------------------------


## Main results (Table 2 in paper)
#this is a trick to export both parts of model via stargazer
mh1x <- mh1
mh1x$param$index$betaO <- mh1$param$index$betaS
mh1x$param$index$betaS <- mh1$param$index$betaO

stargazer(mh1x, mh1, mh2, mh3, mh4,          
          column.labels = c("Selection eq.","Outcome eq. (Overall)","Outcome eq. (Pat.)","Outcome eq. (Marks)","Outcome eq. (Copy.)"),
          digits=2, 
          type = "latex", 
          header = FALSE,
          order=c(1:2, 10, 3:9, 11),
          covariate.labels=c("GDP (diff., log.)",
                             "Patents (diff., log.)",
                             "Patents (diff., log.) x GDP (diff., log.)", 
                             "BRIC countries",
                             "GDPpc (diff., log.)",
                             "Democracy (mean)",
                             "WTO member",
                             "Word count (PTA, log.)",
                             "Year (count)",
                             "TBT provision"),
          dep.var.labels.include = FALSE,
          title = "Main results",
          label = "tab_selection",
          single.row = FALSE,
          font.size = "footnotesize",
          dep.var.caption = "",
          omit.stat = c("f","ser", "chi2"),
          no.space = TRUE,
          out="table2_mainresults.tex")
write_last_clip()


## Robustness checks (I) (Table A7 in the Appendix)
stargazer(mh5, mh6, mh7, mh8, mh9, mh10,         
          column.labels = c("Outcome eq. (DESTA IPR)", "Outcome eq. (TRIPS+)", "Outcome eq. (Patents weighted)", "Outcome eq. (IP protection)", "Outcome eq. (RD spend.)", "Outcome eq. (Mean diff.)"), 
          digits=2, 
          type = "latex", 
          header = FALSE,
          order=c(1:5, 14:18, 6:13, 19),
          covariate.labels=c("GDP (diff., log.)",
                             "Patents (diff., log.)",
                             "Patents share (diff., log.)",
                             "IP protection (diff.)",
                             "R&D spending (diff.)",
                             "Patents (diff., log.) x GDP (diff., log.)", 
                             "Patents share (diff., log.) x GDP (diff., log.)",
                             "IP protection (diff.) x GDP (diff., log.)",
                             "R&D spending (diff.) x GDP (diff., log.)",
                             "Patents (diff. mean, log.) x GDP (diff. mean, log.)", 
                             "GDP (diff. mean, log.)",
                             "Patents (diff. mean, log.)",
                             "BRIC countries",
                             "GDPpc (diff., log.)",
                             "Democracy (mean)",
                             "WTO member",
                             "Word count (PTA, log.)",
                             "Year (count)",
                             "Constant"),
          dep.var.labels.include = FALSE,
          title = "Robustness checks (I)",
          label = "tab_rob1",
          single.row = FALSE,
          font.size = "footnotesize",
          dep.var.caption = "",
          omit.stat = c("f","ser", "chi2"),
          no.space = TRUE,
          out="tableA7_robustnesschecksI.tex")


## Robustness checks (II) (Table A8 in the Appendix)
stargazer(mh11, mh12, mh13, mh14, mh15,
          column.labels = c("Outcome eq. (Small-large)", "Outcome eq. (Add. controls)", "Outcome eq. (Patents min)", "Outcome eq. (Top econ.)", "Outcome eq. (North-South)"), 
          digits=2, 
          type = "latex", 
          header = FALSE,
          order=c(1:2, 18, 3:17, 19, 20),
          covariate.labels=c("GDP (diff., log.)",
                             "Patents (diff., log.)",
                             "Patents (diff., log.) x GDP (diff., log.)", 
                             "BRIC countries",
                             "GDPpc (diff., log.)",
                             "Democracy (mean)",
                             "WTO member",
                             "Word count (PTA, log.)",
                             "Year (count)",
                             "GDP (sum, log.)", 
                             "Plurilateral",
                             "Region-country",
                             "Region-region",
                             "UK roll-over agreements",
                             "Patents (mean, log.)",
                             "Big3 economies",
                             "North-South",
                             "South-South",
                             "North-South x GDP (diff., log.)",
                             "North-South x GDP (diff., log.)"),
          dep.var.labels.include = FALSE,
          title = "Robustness checks (II)",
          label = "tab_rob2",
          single.row = FALSE,
          font.size = "footnotesize",
          dep.var.caption = "",
          omit.stat = c("f","ser", "chi2"),
          no.space = TRUE,
          out="tableA8_robustnesschecksII.tex")


## Main results (no controls) (Table A5 in the Appendix) 
mh0x <- mh0
mh0x$param$index$betaO <- mh0$param$index$betaS
mh0x$param$index$betaS <- mh0$param$index$betaO

stargazer(mh0x, mh0,
          column.labels = c("Selection eq.", "Outcome eq."),
          digits=2,
          type = "latex",
          header = FALSE,
          order=c(1:2, 4, 3, 5),                  
           covariate.labels=c("GDP (diff., log.)",
                              "Patents (diff., log.)",
                              "Patents (diff., log.) x GDP (diff., log.)",
                              "TBT provision",
                            "Constant"),
          dep.var.labels.include = FALSE,
          title = "Main results (no controls)",
          label = "tab_null",
          single.row = FALSE,
          font.size = "footnotesize",
          dep.var.caption = "",
          omit.stat = c("f","ser", "chi2"),
          no.space = TRUE,
          out="tableA5_nocontrol.tex")


## English PTAs only - translated PTAs excluded (Table A6 in the Appendix)
mh16x <- mh16
mh16x$param$index$betaO <- mh16$param$index$betaS
mh16x$param$index$betaS <- mh16$param$index$betaO

stargazer(mh16x, mh16, mh16a, mh16b, mh16c,        
          column.labels = c("Selection eq.", "Outcome eq. (Overall)", "Outcome eq. (Pat.)", "Outcome eq. (Marks)", "Outcome eq. (Copy.)"),
          digits=2, 
          type = "latex", 
          header = FALSE,
          order=c(1:2, 10, 3:9, 11),
          covariate.labels=c("GDP (diff., log.)",
                             "Patents (diff., log.)",
                             "Patents (diff., log.) x GDP (diff., log.)", 
                             "BRIC countries",
                             "GDPpc (diff., log.)",
                             "Democracy (mean)",
                             "WTO member",
                             "Word count (PTA, log.)",
                             "Year (count)",
                             "TBT provision"),
          dep.var.labels.include = FALSE,
          title = "Main results (English PTAs only)",
          label = "tab_selection_engl",
          single.row = FALSE,
          font.size = "footnotesize",
          dep.var.caption = "",
          omit.stat = c("f","ser", "chi2"),
          no.space = TRUE,
          out="tableA6_english_only.tex")


# plotting interaction from main model ------------------------------------


summary(mh1_s <- probit(ipr_included ~ 
                          log(gdp_fill_diff_max) * log(patents_fill_diff_max) +
                          emerging + 
                          log(gdppc_fill_diff_max) + 
                          polity_fill_mean + 
                          wto_memb + 
                          log(ptalength) + 
                          year_count +
                          tbt_prov +
                          NULL,
                        data=datana))
datana$IMR <- invMillsRatio(mh1_s)$IMR1

summary(mh1_o <- lm(lsa1 ~
                      gdp_fill_diff_max_ln * patents_fill_diff_max_ln +
                      emerging + 
                      log(gdppc_fill_diff_max) + 
                      polity_fill_mean + 
                      wto_memb + 
                      log(ptalength) + 
                      year_count +
                      IMR +
                      NULL, 
                    data = datana[datana$ipr_included == 1, ]))

pr1 <- prediction(mh1_o, at = list(gdp_fill_diff_max_ln = quantile(datana$gdp_fill_diff_max_ln, na.rm=TRUE), 
                                   patents_fill_diff_max_ln = quantile(datana$patents_fill_diff_max_ln, probs=c(0.05, 0.95), na.rm=TRUE)), calculate_se = TRUE)
summary(pr1)
pr2 <- as.data.frame(summary(pr1))
names(pr2)[1:2] <- c("gdp_fill_diff_max", "patents_fill_diff_max_ln")
quantiles <- c("5%", "95%")
pr2$patents_quantiles <- rep(quantiles, each=5)
pr2$gdp_rounded <- round(pr2$gdp_fill_diff_max, 0)
(p1 <- ggplot(pr2[pr2$patents_quantiles%in%quantiles,], aes(x=gdp_fill_diff_max, 
                                                            y=Prediction, 
                                                            color=factor(patents_quantiles, levels=quantiles))) +
    geom_line() +
    ylab("Prediction (LSA value)") +
    scale_color_manual(name="Patents (diff):", values=c("blue", "red")) +
    scale_x_continuous("GDP difference (logged)", breaks=quantile(datana$gdp_fill_diff_max_ln, na.rm=TRUE), 
                       labels=c("Minimum", "1st\nQuartile", "Median", "3rd\nQuartile", "Maximum")) + 
    geom_ribbon(aes(ymin=lower,ymax=upper), linetype=2, alpha=0.1) +	
    theme_bw(base_size=14) +
    theme(legend.position = "none",
          axis.text.x=element_text(colour="black"),
          axis.text.y=element_text(colour="black")) +
    annotate(geom="text", x=3, y=-19, label="Patents (diff.) (95%)",
             color="red", size=5) +
    annotate(geom="text", x=3, y=38, label="Patents (diff.) (5%)",
             color="blue", size=5) +
    NULL)

#Figure 1 in the paper
ggsave("int_plot_heckman1.pdf", p1, device="pdf", dpi=600)


# with categorical GDP variable (see Figure A1 in the Appendix) -------------------------------------------


datana$gdp_fill_diff_max_cat <- gtools::quantcut(datana$gdp_fill_diff_max, q = 3, na.rm = TRUE)

summary(mh2_s <- probit(ipr_included ~ 
                            gdp_fill_diff_max_cat * log(patents_fill_diff_max) +
                            emerging + 
                            log(gdppc_fill_diff_max) + 
                            polity_fill_mean + 
                            wto_memb + 
                            log(ptalength) + 
                            year_count +
                            tbt_prov +
                            NULL,
                        data=datana))
datana$IMR <- invMillsRatio(mh1_s)$IMR1

summary(mh2_o <- lm(lsa1 ~
                        gdp_fill_diff_max_cat * patents_fill_diff_max_ln +
                        emerging + 
                        log(gdppc_fill_diff_max) + 
                        polity_fill_mean + 
                        wto_memb + 
                        log(ptalength) + 
                        year_count +
                        IMR +
                        NULL, 
                    data = datana[datana$ipr_included == 1, ]))

pr2 <- prediction(mh2_o, at = list(gdp_fill_diff_max_cat = factor(c("[0.072,93]", "(93,912]", "(912,1.92e+04]")), 
                                   patents_fill_diff_max_ln = quantile(datana$patents_fill_diff_max_ln, probs=c(0.05, 0.95), na.rm=TRUE)), calculate_se = TRUE)
(pr2 <- as.data.frame(summary(pr2)))
names(pr2)[1:2] <- c("gdp_fill_diff_max", "patents_fill_diff_max_ln")
pr2$gdp_fill_diff_max <- factor(pr2$gdp_fill_diff_max, levels(pr2$gdp_fill_diff_max)[c(3,2,1)])
quantiles <- c("5%", "95%")
pr2$patents_quantiles <- rep(quantiles, each=3)
(p2 <- ggplot(pr2, aes(x=gdp_fill_diff_max, 
                       y=Prediction, 
                       color=factor(patents_quantiles, levels=quantiles))) +
        geom_point(position=position_dodge(.3)) +
        ylab("Prediction (LSA value)") +
        scale_color_manual(name="Patents (diff):", values=c("blue", "red")) +
        scale_x_discrete("GDP difference (categorical)", labels=c("Small", "Medium", "Large")) + 
        geom_errorbar(aes(ymin=lower,ymax=upper), position=position_dodge(.3), width=0.2) +	
        theme_bw(base_size=14) +
        theme(legend.position = "none",
              axis.text.x=element_text(colour="black"),
              axis.text.y=element_text(colour="black")) +
        annotate(geom="text", label="Patents (diff.) (95%)", x=2.7, y=35.9,
                 color="red", size=5) +
        annotate(geom="text", label="Patents (diff.) (5%)", x=2.6, y=11.7,
                 color="blue", size=5) +
        NULL)
#Figure A1 in the Appendix
ggsave("int_plot_categorical.pdf", p2, device="pdf", dpi=600)


# Session info ------------------------------------------------------------


sessionInfo()
sink()
#writeLines(capture.output(sessionInfo()), "sessionInfo.txt")